from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector
import time
import random

urls = open('start_urls.txt', 'r')
terms = open('search_terms.txt', 'a')

class TestSpider(BaseSpider):
  name = "wiki"
  allowed_domains = ["en.wikipedia.org"]
#  start_urls = [urls.readline()[:-1]]
  start_urls = []
  for url in urls.readlines():
    start_urls.append(url[:-1])

  def parse(self, response):
    hxs = HtmlXPathSelector(response)
    sites = hxs.select('//p/a')
    term_list = []
    for site in sites:
      title = str(site.select('text()').extract())[3:-2] + '\n'
      term_list.append(title)
    for term in term_list:
      if term == '' or term == '\n' or term.isdigit():
        pass
      else:
        terms.writelines(term)
    terms.close()
